# SET DIRECTORY TO SOURCE FILE 

library(plyr)
library(tidyverse)
require(miceadds)
require(ggplot2)
library(plyr)

dat_t <- read.csv('../../data/cleaned/heuristics_cleaned_long.csv', stringsAsFactors = FALSE)

# What percent correct in control group?
mean(dat_t$correct[dat_t$indirect == 0 & dat_t$direct == 0], na.rm = TRUE)

# Regress whether the respondent guessed correctly on whether they were cued, with issue area fixed effects and respondent-level clustered SEs.
# Respondents in the treatment group are only randomly assigned to one rating among the ratings that it makes sense to show them for their MoC (because the MC has both the rating and the corresponding vote). Some MCs are missing different numbers than others, meaning that respondent's probability of assignment to each issue depends on how many other issues there. E.g., if you got issue A and your MC has 3 other issues, your prob of assignment is 1/4, but if your MC has 7 other issues, your prob of assignment is 1/8. So we need fixed effects for the number of issues to which you were assigned.
main.results <- summary(miceadds::lm.cluster(data = dat_t,
                             formula = correct ~ indirect + direct + 
                               factor(issue) + factor(num_eligible_ratings),
                             cluster = 'id'))
main.results
main.result.coefs <- main.results[2:3,1:2]

# remove NARAL, Parks, NRA
dat_no_obvious <- filter(dat_t, !randomrating_house_vote_id %in% c(55735, 58622, 60587))
summary(miceadds::lm.cluster(data = dat_no_obvious,
                             formula = correct ~ indirect + direct + 
                               factor(issue) + factor(num_eligible_ratings),
                             cluster = 'id'))

# We will test whether there is a positive coefficient on direct, our main effect, or indirect, our spillover effect.

# Run by PK scale level
models <- dlply(dat_t, "knowledgescale", function(df) 
  lm(correct ~ indirect + direct + factor(num_eligible_ratings), data = df))

# Apply coef to each model and return a data frame
get.coefs <- function(i) {
  pk.level <- names(models)[i]
  lm.obj <- models[[i]]
  coefs <- summary(lm.obj)$coefficients
  return(c(indirect.est = coefs[2,1],
           indirect.se = coefs[2,2],
           direct.est = coefs[3,1],
           direct.se = coefs[3,2],
           pk.level = pk.level))
}
estimates <- adply(1:length(models), 1, get.coefs)
for(i in 1:ncol(estimates)) estimates[,i] <- as.numeric(estimates[,i])
estimates

# are we up against a ceiling for PK?
dat_t %>% group_by(knowledgescale) %>% summarize(correctperc = mean(correct))

# results by issue
# Break up data by issue, then fit the specified model to each piece and
# return a list
models <- dlply(dat_t, "issue", function(df) 
  lm(correct ~ indirect + direct + factor(num_eligible_ratings), data = df))

# Apply coef to each model and return a data frame
get.coefs <- function(i) {
  issue.id <- names(models)[i]
  lm.obj <- models[[i]]
  coefs <- summary(lm.obj)$coefficients
  return(c(indirect.est = coefs[2,1],
           indirect.se = coefs[2,2],
           direct.est = coefs[3,1],
           direct.se = coefs[3,2],
           house_vote_id = issue.id))
}
estimates <- adply(1:length(models), 1, get.coefs)
for(i in 1:ncol(estimates)) estimates[,i] <- as.numeric(estimates[,i])
estimates <- merge(estimates, read.csv('../../data/Ancillary Data/vote_and_group_info_map.csv', stringsAsFactors = FALSE))
head(estimates)

# add the average estimates
overall.ests <- data.frame(direct.est = main.result.coefs[2,1],
                           direct.se = main.result.coefs[2,2],
                           indirect.est = main.result.coefs[1,1],
                           indirect.se = main.result.coefs[1,2],
                           sig_name = 'Average Across All')

# plot estimates
estimates <- estimates[order(estimates$direct.est),]
estimates <- plyr::rbind.fill(overall.ests, estimates)

estimates$sig_name <- factor(estimates$sig_name, ordered = TRUE, levels = estimates$sig_name)
g <- ggplot(estimates, aes(x = sig_name)) +
  geom_hline(yintercept=0, lty=2, lwd=1, colour="grey50") +
  geom_point(aes(y = direct.est)) +
  geom_errorbar(aes(ymin = direct.est - 1.96 * direct.se, ymax = direct.est + 1.96 * direct.se), 
                lwd = 1, width = 0) +
  theme_bw() + coord_flip() +ylim(-.25, .45) +
  ylab('Treatment Effect on Accurate Perception of\nMC Vote on Relevant Issue (0/1)') + xlab('Interest Group Name')
ggsave('../../figures/fig3a.pdf', g, scale = .8, width = 9, height = 5)
weighted.mean(estimates$direct.est, 1/(estimates$direct.se^2))

